import os
import win32com.client as win32
import csv

# 查找电脑上的 .ppt 文件
def find_ppt_files(directory):
    ppt_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.ppt'):
                ppt_files.append(os.path.join(root, file))
    return ppt_files

# 读取 .ppt 文件
def read_ppt(file_path):
    powerpoint = win32.gencache.EnsureDispatch('PowerPoint.Application')
    powerpoint.Visible = False
    presentation = powerpoint.Presentations.Open(file_path)
    content = []
    for slide in presentation.Slides:
        slide_content = []
        for shape in slide.Shapes:
            if shape.HasTextFrame:
                text_frame = shape.TextFrame
                if text_frame.HasText:
                    slide_content.append(text_frame.TextRange.Text)
        content.append(slide_content)
    presentation.Close()
    powerpoint.Quit()
    return content

# 写入 .ppt 文件
def write_ppt(file_path, content):
    powerpoint = win32.gencache.EnsureDispatch('PowerPoint.Application')
    powerpoint.Visible = False
    presentation = powerpoint.Presentations.Add()
    for slide_content in content:
        slide = presentation.Slides.Add(presentation.Slides.Count + 1, 1)  # 1 表示 ppLayoutText
        for text in slide_content:
            slide.Shapes.Title.TextFrame.TextRange.Text = text
    presentation.SaveAs(file_path)
    presentation.Close()
    powerpoint.Quit()

# 主函数
def main():
    directory = r'C:\Users\王相辰\Desktop\大一大二_论文+一些作业'  
    ppt_files = find_ppt_files(directory)
    
    all_data = []

    # 处理 .ppt 文件
    for ppt_file in ppt_files:
        content = read_ppt(ppt_file)
        all_data.append(['.ppt', ppt_file, content])
        # 写入一个新的 .ppt 文件
        new_ppt_file = ppt_file.replace('.ppt', '_new.ppt')
        write_ppt(new_ppt_file, content)
        print(f"处理并保存了 {ppt_file} 到 {new_ppt_file}")

    # 保存所有数据到 CSV 文件
    with open('ppt_documents_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['File Type', 'File Path', 'Content'])
        for data in all_data:
            # 将内容转换为字符串以便写入 CSV
            content_str = '\n'.join(['\t'.join(map(str, slide)) for slide in data[2]])
            writer.writerow([data[0], data[1], content_str])
    
    print("所有数据已成功保存到 ppt_documents_data.csv 文件中")

if __name__ == "__main__":
    main()